# -*- encoding: utf-8 -*-
# @Time       :  15:26
# @Author     : yuxian
# @Email      : 1503889663@qq.com
# @File       : 正则清洗.py
# @SoftWare   : PyCharm
# -*- encoding: utf-8 -*-
# @Time       :  1:35
# @Author     : yuxian
# @Email      : 1503889663@qq.com
# @File       : 正则文本清洗.py
# @SoftWare   : PyCharm
import os
import re
import enchant
import nltk
import openpyxl
import pandas as pd
from nltk.sentiment import SentimentIntensityAnalyzer
from textblob import TextBlob

en_dict = enchant.Dict("en_US")
checker = en_dict


def clean_cleaned_text(cleaned_text):
    # 去掉编码不正确的字符
    cleaned_text = ''.join([char for char in cleaned_text if ord(char) < 128])
    # 去掉 `*、(s)、's、 - 、`
    cleaned_text = re.sub("[*]|\(s\)|\'s|\s+-\s+", " ", cleaned_text)
    # 去掉 `/> 、146293Job`
    cleaned_text = re.sub("/>|Zone\s+\d+(:)?|phone:|TTY:|tty:|\d+[Jj]ob", " ", cleaned_text)

    # 一些其它替换
    cleaned_text = re.sub("401\(k\)", "401k", cleaned_text)
    cleaned_text = re.sub("San Francisco(,)\s+CA", "San Francisco, California", cleaned_text)
    cleaned_text = re.sub("e\.g\.", " eg ", cleaned_text, flags=re.IGNORECASE)
    cleaned_text = re.sub("b\.g\.", " bg ", cleaned_text, flags=re.IGNORECASE)
    cleaned_text = re.sub("D\.C\.", "DC ", cleaned_text, flags=re.IGNORECASE)
    cleaned_text = re.sub("i\.e\.", " ie ", cleaned_text, flags=re.IGNORECASE)
    cleaned_text = re.sub("[Ii]nc\.", " Inc ", cleaned_text, flags=re.IGNORECASE)
    cleaned_text = re.sub('%', " percent ", cleaned_text)
    cleaned_text = re.sub(r" (the[\s]+|The[\s]+)?U\.S\.(A)? ", " America ", cleaned_text)
    cleaned_text = re.sub(r"U(\.)?S(\.)?(A)?", " America ", cleaned_text)
    cleaned_text = cleaned_text.replace("\\n", " ")
    cleaned_text = cleaned_text.replace("\\", " ")

    # /
    def rule1(patter):
        matched_string = patter.group(0)
        return matched_string.replace("/", " per ")

    cleaned_text = re.sub("day(s)?/week|\d+(\s)?/year(s)?", rule1, cleaned_text)
    cleaned_text = re.sub("\s+/\s+|/[Oo][Rr]", "/", cleaned_text)

    def rule2(patter):
        matched_string = patter.group(0)
        return matched_string.replace("/", " or ")

    cleaned_text = re.sub("[A-Za-z]+/[A-Za-z]+", rule2, cleaned_text)
    cleaned_text = re.sub("[A-Za-z]+/\d+", rule2, cleaned_text)
    cleaned_text = re.sub("\d+/[A-Za-z]+", rule2, cleaned_text)
    cleaned_text = re.sub("/", " or ", cleaned_text)

    # &
    cleaned_text = re.sub("\s+&(\s)?", " and ", cleaned_text)
    # |
    cleaned_text = re.sub("\|", ", ", cleaned_text)

    # 美元
    def rule3(patter):
        matched_string = patter.group(0)
        if "." in matched_string:
            if int(matched_string.split(".")[-1]) <= 0:
                matched_string = re.sub("\.\d+", " ", matched_string)

        matched_string = matched_string.replace("between", "").replace("and", "-")
        return matched_string.replace(",", "").replace(" ", "").replace("$", "dollar").replace("-", " - ") + " "

    cleaned_text = re.sub("\$\d+(,)?\d+(\.)?\d+(\s)?-(\s)?\$\d+(,)?\d+(\.)?\d+", rule3, cleaned_text)
    cleaned_text = re.sub("\$\d+(,)?\d+(\.)?\d+(\s)?to(\s)?\$\d+(,)?\d+(\.)?\d+", rule3, cleaned_text)
    cleaned_text = re.sub("between\s+\$\d+(,)?\d+(\.)?\d+(\s+)?and(\s+)?\$\d+(,)?\d+(\.)?\d+", rule3, cleaned_text)
    cleaned_text = re.sub("\$\d+(,)?\d+(\.)?\d+(\s)?", rule3, cleaned_text)

    # 其它特殊
    cleaned_text = re.sub("#.*?\s|@.*?\s|\[\s+[Ll]ink\s+removed\s+]|\(\d+\)|\[\s+Email\s+address\s+blocked\s+]", " ",
                          cleaned_text)

    def rule4(patter):
        matched_string = patter.group(0)
        digit = re.findall("\d+", matched_string)[0]
        if matched_string[0].isdigit():
            return digit + " " + matched_string.split(digit)[-1]
        else:
            return matched_string.split(digit)[-1] + " " + digit

    cleaned_text = re.sub("\d+[a-zA-Z]{3}", rule4, cleaned_text)
    cleaned_text = re.sub("[a-zA-Z]{3}\d+", rule4, cleaned_text)

    # 检查连写
    def rule5(patter):
        matched_string = patter.group(0)
        # print(matched_string)
        if matched_string.lower() not in ["PowerPoint".lower(), "PowerShell".lower(), "WordPress".lower(),
                                          "MacBook".lower(), "YouTube".lower(), "TikTok".lower()]:
            matched_string = en_dict.suggest(matched_string)[0] if not en_dict.check(matched_string) else matched_string
        return matched_string

    cleaned_text = re.sub("[A-Z]+[A-Z]+[a-z]+|([A-Z]+)?[a-z]+[A-Z]+([a-z]+)?", rule5, cleaned_text)

    cleaned_text = re.sub("USD|\s+dol\s+", " dollar ", cleaned_text)

    def pad_str(s):
        return ' ' + s + ' '

    def pad_pattern(pattern):
        matched_string = pattern.group(0)
        return pad_str(matched_string)

    cleaned_text = re.sub('[\!\?\^\+\*\/\~\|\`\=\:\;\.\\\]', pad_pattern, cleaned_text)

    # def quoted_string_parser(pattern):
    #     string = pattern.group(0)
    #     parsed = self.nlp(string[1:-1])
    #     is_meaningful = False
    #     for token in parsed:
    #         # if one of the token is meaningful, we'll consider the full string is meaningful
    #         if len(token.text) > 2 and not token.text.isdigit() and token.has_vector:
    #             is_meaningful = True
    #     if is_meaningful:
    #         return string
    #     else:
    #         return ''
    #
    # cleaned_text = re.sub('\".*?\"', quoted_string_parser, cleaned_text)
    # cleaned_text = re.sub("\'.*?\'", quoted_string_parser, cleaned_text)
    # cleaned_text = re.sub("\(.*?\)", quoted_string_parser, cleaned_text)
    # cleaned_text = re.sub("\[.*?\]", quoted_string_parser, cleaned_text)
    # cleaned_text = re.sub("\{.*?\}", quoted_string_parser, cleaned_text)
    # cleaned_text = re.sub(' s ', " ", cleaned_text)

    # 去掉换行 和多余的空格
    cleaned_text = " ".join([word.strip() for word in cleaned_text.split()])
    return cleaned_text


def fenJu(text):
    cleaned_text = re.sub("[^\W]\\n+\w", lambda x: re.sub("\\n+", " . ", x.group(0)), text)
    cleaned_text = re.sub("\W\\n+.",
                          lambda x: re.sub("\\n+", " . ", x.group(0)) if x.group(0).strip()[0] not in [".", ":", "?",
                                                                                                       "!",
                                                                                                       ";"] else re.sub(
                              "\\n+", " ", x.group(0)), cleaned_text)
    cleaned_text = cleaned_text.replace("\\n", " ")
    cleaned_text = " ".join([i.strip() for i in cleaned_text.split()])
    print(cleaned_text)
    # # 使用Punkt句子分割器将文本分割成句子
    # sentences = nltk.sent_tokenize(text)
    #
    # # 打印分割后的句子
    # for sentence in sentences:
    #     print(sentence)
    #
    #     # 使用SentimentIntensityAnalyzer分析每个句子的情感强度，并纠正标点符号
    # corrected_text = ""
    # for sentence in sentences:
    #     # 分析情感强度
    #     sentiment_scores = sia.polarity_scores(sentence)
    #
    #     # 如果情感强度为负面，则在结尾添加问号或感叹号
    #     if sentiment_scores['compound'] < 0:
    #         if sentence[-1] != '?' and sentence[-1] != '!':
    #             corrected_text += sentence + '?' if sentence[-1] == '.' else sentence + '!'
    #             # 如果情感强度为正面或中性，则在结尾添加句号或逗号
    #     else:
    #         if sentence[-1] != '.' and sentence[-1] != ',':
    #             corrected_text += sentence + '.' if sentiment_scores['compound'] >= 0.05 else sentence + ','
    #             # 添加空格分隔句子
    #     corrected_text += ' '
    #
    # print("纠正后的文本：")
    # print(corrected_text)


def zh_clear(cleaned_text):
    cleaned_text = re.sub(r'<', "小于", cleaned_text)
    cleaned_text = re.sub(r'<=|≦', "小于等于", cleaned_text)
    cleaned_text = re.sub(r'>=|≥', "大于等于", cleaned_text)
    cleaned_text = re.sub(r'>', "大于", cleaned_text)
    cleaned_text = re.sub(r'=', "等于", cleaned_text)

    # 使用正则表达式删除以 "@" 开头的邮箱地址
    cleaned_text = re.sub(r'@\w+\.\w+', ' ', cleaned_text)
    patter = re.compile(r'[http|https]*://[a-zA-Z0-9.?/&=:]*|www\.[a-zA-Z0-9.?/&=:]*', re.S)
    cleaned_text = re.sub(patter, '', cleaned_text)
    cleaned_text = re.sub(r"\w+\.(edu|org|com|info)", '', cleaned_text)
    # 去除HTML标记
    pattern = re.compile('(<)?(style=)?.*?>')
    cleaned_text = re.sub(pattern, ' ', cleaned_text)
    # 去掉序号
    cleaned_text = re.sub(r"\d,[\u4e00-\u9fa5]", lambda x: ", " + x.group(0)[-2:], cleaned_text)
    cleaned_text = re.sub(r"\d(\.|．)[\u4e00-\u9fa5]", lambda x: ", " + x.group(0)[-2:], cleaned_text)
    cleaned_text = re.sub(r"\d:[\u4e00-\u9fa5]", lambda x: ", " + x.group(0)[-2:], cleaned_text)
    cleaned_text = re.sub(r"\d(\.|．)\d-\d年", lambda x: x.group(0)[2:], cleaned_text)
    cleaned_text = re.sub(r"\s?\d(,|\.)\s", ', ', cleaned_text)
    cleaned_text = re.sub(r'（\d+）', ' ', cleaned_text)
    cleaned_text = re.sub(r'\n+\d+(.)?(\.|．)|^\d+(.)?(\.|．)', ", ", cleaned_text)
    cleaned_text = re.sub(r"\d+-\w", lambda x: x.group(0)[-1] if not x.group(0)[-1].isdigit() else x.group(0), cleaned_text)
    cleaned_text = re.sub(r'\n+\d(\.|．){5}', lambda x: x.group(0) if "year" in x.group(0) or "年" in x.group(0) or "万" in x.group(0) or "元" in x.group(0) or len(re.findall("\d", x.group(0))) > 2 else re.sub(r"\d+(\.|．)?", ", ", x.group(0)).strip(), cleaned_text)
    cleaned_text = re.sub(r'[a-z]\.|\\n|\\r|\\t', ' ', cleaned_text)
    cleaned_text = re.sub(r'[一二三四五六七八九十]、|[一二三四五六七八九十],', ' ', cleaned_text)
    cleaned_text = re.sub(r'（[一二三四五六七八九十]）', ' ', cleaned_text)
    cleaned_text = re.sub(r'\s\d+）', ' ', cleaned_text)
    cleaned_text = re.sub(
        r"\\uf09e|\\uf09f|\\uf0b7|\\x9f|\\u200b|\\u2002|\\uf06c|\\xa0|\\u3000|\\uf0fc|\\uf0d8|\\ufeff|\\u2028|●|▪", " ",
        cleaned_text)
    cleaned_text = re.sub(r'①|②|③|④|⑤|⑥|⑦|⑧|⑨|⑩|Ø', ' ', cleaned_text)

    def check_contain(string_, list_):
        for i in list_:
            if i in string_:
                return True
        else:
            return False

    def rule1(patten):
        matched_string = patten.group(0)
        filter_list = ["早", "晚", "下午", "上午", "天", "个", "岁", "月", "元", "～", "-", "——", "号", "路"]
        if check_contain(matched_string, filter_list):
            # 如果包含
            pass
        else:
            if len(re.findall("\d", matched_string)) < 2 and "年" not in matched_string:
                matched_string = re.sub(r"\d\s,", ', ', matched_string)
                matched_string = re.sub(r"\d\s(\.|．)", ', ', matched_string)
                matched_string = re.sub(r"\d,", ', ', matched_string)
                matched_string = re.sub(r"\d(\.|．)", ', ', matched_string)
            else:
                if "年" in matched_string:
                    matched_string = re.sub(r"\d(\.|．)", ", ", matched_string)
                if "." in matched_string:
                    matched_string = re.sub(r"\d(\.|．)(\d)?(\d)?[^元0-9]", lambda x: ", " + x.group(0)[-1], matched_string)
                if re.findall(",\s\d\w", matched_string):
                    matched_string = re.sub(r",\s+\d\w", lambda x: " " + x.group(0)[-1] if x.group(0)[-1] not in ["早", "晚", "下午", "上午", "天", "个", "岁", "月", "元", "～", "-", "——", "号", "：", "路", "年"] else x.group(0), matched_string)
        return matched_string

    cleaned_text = re.sub(r".{3}\d.{3}", rule1, cleaned_text)
    cleaned_text = re.sub(r".{5}\d.{7}", rule1, cleaned_text)
    cleaned_text = re.sub("\d(\.|．)\d+(-|~)\d+(周)?(岁|天)", lambda x: x.group(0)[2:], cleaned_text)
    cleaned_text = re.sub("\d\.\d+(周)?岁", lambda x: x.group(0)[2:], cleaned_text)
    cleaned_text = re.sub(r"\d、|\d\s、", ', ', cleaned_text)
    cleaned_text = re.sub(r"\s\d,\s?", ', ', cleaned_text)
    print(cleaned_text)
    return cleaned_text


if __name__ == '__main__':
    # 初始化SentimentIntensityAnalyzer对象
    sia = SentimentIntensityAnalyzer()
    str1 = """Job Requisition ID # . predisposition OverviewAutodesk's Customer Experience (CX) Analytics team is looking for a 
passionate and driven Data Analyst with proven experience driving organizational change through rigorous data 
analysis. This role will perform deep data mining and statistical behavioral analysis of Autodesk's customer data 
and will partner with internal business stakeholders to refine key success measures and find insights to drive 
program efficiency. They will play a significant role in shaping a customer-focused and data-driven culture. Join a
dynamic team that is helping to transform business decisions and processes with actionable customer insights gained
from meaningful research, analysis and measurement of the Autodesk customer experience. Job Title:Lead Business 
Intelligence AnalystLocation: San Francisco, Irresponsibility collaboratively and cross functionally to define and 
meet stakeholder requirements Translate business objectives into technical data requirements and balance them with 
technical feasibility, recommending changes in development, maintenance and platform standards as necessary Perform
deep dive analyses to understand trends, anomalies and insights that will drive operational improvements Craft data
stories through presentations, written summaries, and data visualizations that accurately outline problem 
statements and provide actionable and unbiased intelligence and recommendations Communicate findings from 
initiatives with clarity and accountability to the broader organization/stakeholdersClearly document provenance of 
data, ETL logic, and code used to develop models Proactively identify areas in which analytics efforts can answer 
business questions, drive operational improvements and business value Spearhead the development of insight-driven 
tools & dashboards and provide ongoing support regarding functionality and 
data-integrityMinimumRequirementsProficiency using SQL to query large proficiency with Google Analytics and/or 
Adobe Analyticalally experience with Microsoft Excel (pivot tables, advanced modeling, creating charts/graphs) and 
PowerPoint with BI development and database systems such as PowerBI, Looker and Tableau Experience with scripting 
languages such as R, Python or equivalent Experience with Qubole, Snowflake or equivalent The Ideal Candidate 
Extensive experience in roles combining data analysis/business intelligence, research and strategy Strong 
problem-solving skills and sharp business judgmentData-curious, interested in telling the story behind the 
dataDetail-oriented, ensuring data accuracy and consistency Exceptional communication skills, attentive listener 
and compelling influence to new ideas and respectful of differing opinions/perspectivesAddresses difficult problems
head-on and challenges the status quo Can adapt to change and is open to learning new skillsSelf-motivated and can 
work independently in a fast-paced environment Highly collaborative and can work cross-functionally, while 
cultivating relationships with colleagues and stakeholders Passionate about improving the customer experience Click
below to learn more about our benefits in the US.At Autodesk, we're building a diverse workplace and an inclusive 
culture to give more people the chance to imagine, design, and make a better world. Autodesk is proud to be an 
equal opportunity employer and considers all qualified applicants for employment without regard to race, color, 
religion, age, sex, sexual orientation, gender, gender identity, national origin, disability, veteran status or any
other legally protected characteristic. We also consider for employment all qualified applicants regardless of 
criminal histories, consistent with applicable law. Are you an existing contractor or consultant with Autodesk? . 
Please search for open jobs and apply internally (not on this external site). If you have any questions or require 
support, contact Autodesk Careers. Salary is one part of Autodesk's competitive package. For U.S.-based roles, we 
expect a starting base salary between $109,500 and $187,770. Offers are based on the candidate's experience and 
geographic location, and may exceed this range. In addition to base salaries, we also have a significant emphasis 
on annual cash bonuses, commissions for sales roles, stock grants, and a comprehensive benefits package. 
SummaryLocation:San Francisco, CA, USAType: Full timescale .Summary: The Business Analyst I possesses the fundamentals of analytical, strategic and project management skills. 
They need to have strong initiative, communication, and organizational skills to manage multiple project timelines.
The Business Analyst I will support all business areas by bridging the technical knowhow with the operational 
expertise of the business owners. This position is key to understanding and documenting capabilities needed to 
address business challenges with technical solutions. The Business Analyst I will assist in requirements 
development by reviewing business processes and systems to identify and address operational efficiencies and 
improvements. This position will act as the liaison among internal business and technical stakeholders to analyze, 
document, communicate and validate business and system requirements for platform applications. Other duties as 
assigned. Business Analyst I: The Business Analyst I will act in a support role and be given project tasks, as 
overseen by the project lead. Reporting will be given at limited complexity as this role continues to build their 
familiarity with our business and data structure. Work is closely managed by the Business Intelligence Manager. 
Essential Duties and Responsibilities:Business Analyst I: Gather, organize, and create an execution plan for 
business and data requirements . Create reports, dashboards, and visualizations to understand business performance 
. Manage the day-to-day functions of data export jobs . Manage the cleanliness of master data and provide 
opportunities for training and better efficiencies . Recognize and adopt best practices in reporting and analysis: 
data integrity, test design, analysis, validation, and documentation . Coach, demonstrate, and uphold Friedman's 
Core Values . Maintain regular and dependable attendance and punctuality . Education/Experience: Minimum of 1-3 
years business or data analytics experience . Bachelor's degree in a related field such as Data Science, Business 
Analytics, Statistics, Business Management, Economics, or Computer Science . ITIL or a Business Analyst 
Certification, a plus . Knowledge, Skills and Abilities: Intermediate Microsoft Suite, Word, Excel and PowerPoint 
Skills . Experience with reporting tools such as Power Bi . Ability to multitask across multiple lines and daily 
tasks . Experience working with ERP systems, such as Dynamics AX, D365 for F&O, Nav, a plus . Active listener with 
strong verbal & written communication skills . Self-starter who takes initiative . Detail-oriented and highly 
organized . Strong planning and strategic thinking . High Energy, Driven, Positive Attitude . Eagerness to learn, 
map & improve business processes . Excellent time management skills with a proven ability to meet deadlines and 
multi-task . Ability to work both independently and as part of a team, take initiative, and be proactive . Reliable
work habits; accurate, efficient, proactive, and self-motivated . Strong organizational skills and detail-oriented 
with ability to prioritize multiple assignments . Dependable time management skills with the ability to and handle 
simultaneous tasks and responsibilities . Effectively respond to all situations using sound judgement and 
decision-making skills . What's in Friedman's toolbox for Team Members: 401(k) match up to 3% and 50 cents on the 
dollar for any amount greater than 3% up to 5% Medical, Dental and Vision bundled benefit plan . Team Member 
discount on all merchandise . Casual dress code . Tuition reimbursement to further career path . Friedman's Home 
Improvement is proud to be an Equal Opportunity Employer, committed to a diverse and inclusive work environment. 
Friedman's Home Improvement will consider for employment qualified applicants with criminal convictions in a manner
consistent with AB 1008.SDL2017 .Requisition ID# 146236Job . Category: Business Operations / Strategy Job Level: Individual Contributor Business 
Unit: Energy Policy & Procurement Work Type: Hybrid Job Location: Oakland Department Overview Energy Policy & 
Procurement (EPP) secures electricity and natural gas from wholesale markets for PG&E customers to ensure system 
reliability. In addition, EPP is responsible for long-term planning, risk management, and compliance functions 
related to PG&E's energy portfolio. EPP also actively participates in regulatory and legislative proceedings that 
shape the design and implementation of state, regional, and federal energy regulation and policy. The Strategic 
Analyst position resides with the Bundled Portfolio Planning and Analysis (BPPA) team within EPP's Energy Policy 
Analysis and Design (EPAD) department. EPAD is responsible for forecasting volumes, costs and impacts of demand- 
and supply-side energy resources, and portfolio positions for commodity market products and services and compliance
instruments. The team is also responsible for market intelligence and assessment of supply-side and demand-side 
energy resources. EPAD supports long-term planning for supply-side and demand-side resources, electric transmission
and distribution infrastructure, and business strategies to meet the future energy needs of PG&E's customers 
reliably, at a reasonable cost, and consistent with company and regulatory/legislative environmental policies. 
Position SummaryPG&E is seeking a highly motivated Analyst with experience in electric portfolio forecasting 
analytics, capacity expansion and production simulation modeling, supply-side resources, utility business model, 
and business process management. The analyst will work closely with stakeholders to develop forecasts of electric 
portfolio positions; generation and costs associated with utility owned and contracted generation resources; and 
costs associated with other market transactions that are required to serve PG&E's bundled electric load. The 
analyst will generate high-value business insights by identifying, framing and executing analyses related to the 
business impacts of forecasts. As part of this work, the analyst will systematically evaluate and recommend 
improvements to the forecasting process, models, inputs and platforms for production and delivery of forecasts. The
candidate will be capable of developing and managing cross-functional projects, products or processes. The 
candidate will also have excellent written and verbal communication skills, and demonstrated experience informing 
and influencing senior leadership and external stakeholders. PG&E is providing the salary range that the company in
good faith believes it might pay for this position at the time of the job posting. This compensation range is 
specific to the locality of the job. The actual salary paid to an individual will be based on multiple factors, 
including, but not limited to, specific skills, education, licenses or certifications, experience, market value, 
geographic location, and internal equity. We would not anticipate that the individual hired into this role would 
land at or near the top half of the range described below, but the decision will be dependent on the facts and 
circumstances of each case.​A reasonable salary range is:​Bay Area Minimum: $78,000.00Bay Area Mid-point: 
$97,000.00Bay Area Maximum: $116,000.00&/OR​California Minimum: $74,000.00California Mid-point: $92,000.00California
Maximum: $110,000.00This position is hybrid, working from your remote office and your assigned work location based 
on business need. The assigned work location will be within the PG&E Service Territory. Job 
ResponsibilitiesForecasting: Develop forecasts of electric portfolio positions; generation and costs associated 
with utility owned and contracted generation resources; and costs associated with other market transactions that 
are required to serve PG&E's bundled electric load. Ensure that the forecast is systematically and appropriately 
integrated into long-term planning strategic, policy and planning processes. Conduct research to inform/validate 
inputs, assumptions and scenarios. Conduct market and policy research to identify critical uncertainties and inform
the key inputs, assumptions, and the range of scenarios to be forecasted. Market Intelligence and Assessment: . 
Perform market assessment of supply-side and demand side energy resources for electricity. Analysis: Produce 
analytics that guide PG&E's resource planning, infrastructure planning, strategic planning and/or public policy 
position development. The successful candidate will have a broad understanding of the California utility regulatory
context and PG&E's business model to help contextualize the forecasts. Communicate results to client organizations.
Continuous Improvement: . Drive continuous improvement to forecasting to increase the efficiency, persuasiveness 
and pervasiveness of electric portfolio forecasting. Provide support for the development and implementation of a 
roadmap to systematically identify improvements to the forecasting processes, analytics, inputs and capabilities. 
Research and document forecasting best practices, client use-cases, product specifications, analytical methods, 
processes, and platforms for production and delivery of forecasts. Policy Advocacy: . Help develop analysis and 
testimony, work papers and exhibits to support policy position development with respect to forecasting. Review, 
analyze and respond to interveners testimony, comments, etc. to ensure the best possible outcome. 
QualificationsMinimum:Bachelor's Degree in Economics, Engineering, Statistics, Mathematics, Finance, Business, 
Physics or related disciplines or equivalent work experience2 years of job-related experienceDesired:Advanced 
degree in a relevant discipline Demonstrated experience performing energy-related quantitative analysis, 
probabilistic dispatch modeling, production simulation modeling, technology market or program assessments or 
similar functions Demonstrated project, process or product management experience Three years working at electric or
natural gas utilities, independent power producers, wholesale energy market participants, regulatory agencies, 
consulting firms, or similar organizations Knowledgeable about PG&E's business model, including interdependencies 
and interrelationships of the electric and natural gas industries and supply/demand-side businesses Familiarity 
with California regulatory context for investor-owned utilities Excellent oral and written communication skills, 
including communicating with technical and managerial audiences Demonstrated experience performing advanced Excel 
modeling. Working knowledge of Word and PowerPoint. Prior exposure to data-base management techniques. Working 
knowledge of advanced applications programming: R, SQL, SAS, Stata, Matlab or other data analysis/statistical 
software ."""

    str2 = """
Optum is a global organization that delivers care, aided by technology to help millions of people live healthier lives. The work you do with our team will directly improve health outcomes by connecting people with the care, pharmacy benefits, data and resources they need to feel their best. Here, you will find a culture guided by diversity and inclusion, talented peers, comprehensive benefits and career development opportunities. Come make an impact on the communities we serve as you help us advance health equity on a global scale. Join us to start Caring. Connecting. Growing together.

You’ll enjoy the flexibility to work remotely * from anywhere within the U.S. as you take on some tough challenges. You will be asked to to work in a hybrid set-up.

Hybrid Work Schedule - 2 days/week in Lansing office/3 days/week remote

Potential Work Request Description from the State given prior positions

Data Warehouse Business Intelligence Analyst work with end users to provide information in a number of different ways. This may include tasks such as:

	
Running Joint Application Design (JAD) sessions to understand what data is needed and how it is likely to be used
Creating ad-hoc queries to answer specific user questions
Program design and specification
Creating data warehouse-based applications including reporting, dashboards and other information analysis and delivery systems
Supporting end user to enable them to create their own queries and reports as needed
Identify data problems and work with developers to correct them
Using advanced analytics, provide agencies with predictive and explanatory analysis to enable them to better accomplish their missions
Organize and run user groups to encourage self-support among end user and DTMB query and other BI application developers


Data Warehouse Business Intelligence Analyst are also expected to be able to work with agency requestors to define/develop new ETL code, queries and provide training and technical support to both DTMB developers and end users. Their tasks may include:
	
Working with end user requestors to understand their data needs and create and implement solutions to those needs
Develop and present training classes for end users and developers to help learn how to most effectively use the data warehouse platform and the BI tools available to them to support their own data needs where possible
Develop and present training classes for DTMB developers to better understand how to use the BI development tools to create and implement reporting solution using the data warehouse backend - Teradata
Provide one on one support for developers and end users to assist them in accomplishing their data reporting needs




You’ll be rewarded and recognized for your performance in an environment that will challenge you and give you clear direction on what it takes to succeed in your role as well as provide development for other roles you may be interested in.

Required Qualifications:

Bachelor’s degree or equivalent technical study
6+ years of Business Lead / Business Intelligence Analyst experience
4+ years of creating Data Warehouse BI applications including ETL/data mart development, reporting, dashboards and other information analysis and delivery system
4+ years of ETL and SQL development experience
2+ years of Child Welfare Services, Juvenile Justice, Juvenile services, or other related experience
Local to Lansing, MI, with the ability to work 3 days a week in the office.


Preferred Qualifications:

Experience with the State of Michigan PMM/SUITE/Agile methodology
SQL Assistant/Teradata Studio experience
End user training experience
Experience working with Data Modelers and developers, facilitating modification of the logical design to create a physical design most suited to the reporting needs of the customer
Proven ability to work independently and not rely on the State or other team members
Proven communicate effectively, verbally and in writing, with programmers, analysts, immediate supervisor, management, system users, operations, other State agencies and the private sector
Proven ability to meet with users for clarification and elaboration as may be necessary to clearly define a problem and conceptualize a development plan
Proven ability to recognize, gather, correlate, and analyze facts, draw conclusions, define problems, and devise solutions and alternatives and make appropriate recommendations
Proven ability to design applications taking into consideration data reconciliation, (to the source system) query/reporting performance, and analytical business value


Careers with Optum. Our objective is to make health care simpler and more effective for everyone. With our hands at work across all aspects of health, you can play a role in creating a healthier world, one insight, one connection and one person at a time. We bring together some of the greatest minds and ideas to take health care to its fullest potential, promoting health equity and accessibility. Work with diverse, engaged and high-performing teams to help solve important challenges.

California, Colorado, Connecticut, Nevada, New York, Rhode Island, or Washington Residents Only: The salary range for California, Colorado, Connecticut, Nevada, New York, Rhode Island or Washington residents is $85,000 to $167,300. Pay is based on several factors including but not limited to education, work experience, certifications, etc. In addition to your salary, UnitedHealth Group offers benefits such as, a comprehensive benefits package, incentive and recognition programs, equity stock purchase and 401k contribution (all benefits are subject to eligibility requirements). No matter where or when you begin a career with UnitedHealth Group, you’ll find a far-reaching choice of benefits and incentives.

*All employees working remotely will be required to adhere to UnitedHealth Group’s Telecommuter Policy

At UnitedHealth Group, our mission is to help people live healthier lives and make the health system work better for everyone. We believe everyone–of every race, gender, sexuality, age, location and income–deserves the opportunity to live their healthiest life. Today, however, there are still far too many barriers to good health which are disproportionately experienced by people of color, historically marginalized groups and those with lower incomes. We are committed to mitigating our impact on the environment and enabling and delivering equitable care that addresses health disparities and improves health outcomes — an enterprise priority reflected in our mission.


Diversity creates a healthier atmosphere: UnitedHealth Group is an Equal Employment Opportunity/Affirmative Action employer and all qualified applicants will receive consideration for employment without regard to race, color, religion, sex, age, national origin, protected veteran status, disability status, sexual orientation, gender identity or expression, marital status, genetic information, or any other characteristic protected by law.

UnitedHealth Group is a drug - free workplace. Candidates are required to pass a drug test before beginning employment.


Recommended Skills

Agile Methodology
Backend
Business Intelligence
Child Protection
Communication
Dashboard



LikeAPPLE andBanana

"""
    # fenJu(str2)
    str3 = """工作时间： 上午8：30-12:00
                下午13:00-17.00
                周末双休，国际节假日休息
工作要求：
1.18～35岁均可，学历不限
2.接受无经验，新人上岗前有专业人员培训，包教包会
3.熟悉电脑基础操作，对金融市场有浓厚兴趣，具有较强的学习能力和意愿
4.踏实认真，有进取心，服从公司规章制度
岗位职责：
1.跟随主管了解市场，熟悉工作流程
2.数据交易分析，数据统计
3.电脑办公，时刻紧盯数据变化
薪资条件
2000基本工资加绩效，上不封顶，具体面议"""
    str4 = """工作职责
1  日常数据监控，包括但不限于报表、推送、报告等，数据维护
2  随产品版本跟进埋点测试数据等
3  数据治理，报表搭建，数据验收


要求
1  一周至少可以实习4天（不含周末），需要实习至春节。不限专业，本科以上。工作地点在上海，需要现场实习。
2 熟练掌握sql与python的基本使用
3 有意在数据方面发展
4 细心，有上进心，逻辑思维沟通能力强，数据基础扎实。有相关实习经历优先

base上海，200-300一天
应届生如有转正需要，需要在实习至少满2个月后通过答辩才会有校招offer。

非应届需要能够保证实习时间在6个月以上

若简历通过会有简单的笔试，笔试通过后才会提供面试机会。"""
    str5 = """疫情过后，选择大于努力。大胆来尝试人生有很多种可能，只要勇敢一次，往往能得到的东西能比想象中多得多，譬如说现在，我们招人了，你勇敢投简历了吗?   公司2015年成立，目前上海五家分公司，公司人员基本90后00后，公司提供精准客户不打白名单！不打白名单！ 1. 薪资待遇底薪9000，高提成+高奖金+年奖+季度奖， 2. 平均工资3-5W 销售冠军10w+招聘我是认真的，谈恋爱都没有这么认真过 请温柔以待我的面试邀约1 .想赚钱就加入我们，赚钱我们是认真的 接受无经验小白！岗前有培训 三个月晋升主管！！六个月晋升经理！ 年薪百万不是梦！！！薪资待遇1.融资顾问：底薪（底薪8000-9000）+提成+年终奖金+季度奖，人均月薪3.5万且包括新人，前20名人均6万以上，前10名10万以上，前三20万以上（单月工资，集团优秀基层员工月薪百万）2.工作时间9:00-6:00，周六休息，按照国家法定节假日作息，带薪年假；3.新员工可享受入职培训、专业培训、营销技能培训，促进员工持续提升；4.公司为能力优秀者提供晋升空间最快三个月晋升，最快10个月小白晋升区长。5.每周都有下午茶，每周不定期活动，团建，老板待人好，6.公司提供精准客户，7.公司是舒适的办公环境，5A级写字办公楼，门口就是地铁，周围商场环境好。8.欢迎您的加入，共同发展。9.团队积极向上，活力十足，工作轻松，开心愉快。10.团队年轻化，颜值高，形象佳。职位福利：弹性工作、绩效奖金、五险一金职位亮点：高提成 高奖金 晋升快 五险一金"""
    str5 = """YMG全球顾问集团上海总部招聘日本移民资深销售经理。   我们需要的资深顾问是这样的： 1）2年以上移民行业从业经验 2）有较强的分析逻辑力，有较强的赚钱欲望，有坚韧不拔的性格 3）对于销售有自己的一定理解，谈吐稳重、高雅，具备优秀的沟通能力、销售技巧 4）与移民潜在客户进行电话沟通或面对面咨询，详细解答客户关于海外移民的各种问题 5）积极配合公司安排，利用好公司提供的资源，累积专属自己的客户，签约 6）有海外留学经历，美国及加拿大房产销售经验者优先考虑   薪资待遇： 1. 薪酬结构：底薪+绩效+年终奖； 2. 五险一金、月度绩效； 3. 带薪年假； 4. 年度体检； 5. 年度海外旅游。职位福利：五险一金、绩效奖金、全勤奖、带薪年假、年底双薪"""
    str5 = "公募基金科技运营支持岗 岗位职责: 1. 协调IT资源,辅助建设和支持公募业务数据中心。对公募外包各相关系统的数据归集整合和处理,并监控数据健康度; 2. 支持信息披露、监管报送、数据比对等业务,必要时参与数据库层面分析和排错; 3. 日常数据中心生成各类数据,并核查比对; 4. 负责各类数据业务测试业务; 5. 部门交办的其他科技运营相关工作。 任职要求: 1. 本科以上学历; 2. 1年以上金融行业从业经验优先; 3. 1年以上产品代销、管理、托管机构清算业务从业经验优先; 4. 熟悉数据库逻辑或具备资管行业开发经验优先; 5. 具备证券从业资格、基金从业资格或者于限定时间内完成资格考试; 6. 抗压力强、学习能力强、沟通能力强。职位福利:周末双休"
    str6 = "1.按照量化CTA投资经理的策略思路进行代码编译工作2.负责按照要求对现有量化CTA策略的因子改造与优化工作；3.参与CTA量化策略的研发、生成可行的盈利策略，具体涉及商品期货全品种交易想法的策略化、数据处理、研报复现等；4.完成上级领导交给的其它工作。岗位需求：1.***硕士及以上学历，理工科或统计学、数量经济学、金融数学、金融工程背景；2.有较强的逻辑思维能力、学习能力及数据处理能力；3.可熟练使用python语言，掌握matplotlib、pandas、sql等第三⽅库；4.有商品期货全品种量化策略开发经验及实盘经验者优先；5.有使用过wind宏观数据库进行期货基本面数据清洗与过滤者优先；6.具有较强的责任心，事业心，工作勤奋踏实，认真细致，有条理，理解和沟通能力强；7.对金融行业有兴趣，并有志长期发展。职位福利：五险一金、餐补、补充医疗保险、定期体检、员工旅游、节日福利、周末双休、定期团建"
    str7 = "工作地址上丰路1288号三号门。 1.通知银行逾期客户还款，各个阶段都有岗位，适合催收小白和大咖2.五险一金，五百强公司正式员工，享受各项福利待遇，过节费，激励费，高温费，加班费，生日福利，免费班车，公司有内部食堂近百菜色可供选择，为员工免费购买商业保险大病有保险，小病公司医务室免费看病配药，每三个月调整一次底薪，工龄越久底薪越高，收入上不封顶。3.催收群体是平安银行自己的客户，客群质量好，容易催收4.期待对于收入和职业发展有要求的有识之士加入5.工作时间比较自由，做六休一，早上九点到晚七，当日工作完成后可以提前下班。小组业绩提早达标了就一起下班了。拒绝无意义熬时间的加班。常规岗位薪资M1新手小白；7000～10000M2业务熟练；10000～15000M3+资深业务；看能力无上限提奖方式为回收金额百分比提成制度，催回多少就有多少的提成，组内无需竞争排名不内卷。入司后前三个月底薪5000包含1250加班费。入司每三月调整一次底薪，前三次加薪很容易，之后调薪看绩效是否达标。岗位要求1.学历高中及以上2.性格开朗，表达能力强，口齿清晰。3.有客服，催收经验优先。职位福利：五险一金、绩效奖金、加班补助、带薪年假、定期体检、节日福利、高温补贴、3千新人补贴"
    str8 = "工作职责：  ①负责该项目的现场管理工作；  ②负责组织维护作业计划执行，组织故障处理、应急演练等工作；  ③负责与甲方协调、沟通，协调各项工作正常开展；  ④负责对乙方驻场团队的考核、培训等；  ⑤辅助工程随工、验收、调试工作的组织和协调工作。  协调供电、供水、安监等当地政府监管部门，保障园区工作正常开展  任职要求：  1.5年以上数据中心管理经验  2.大专以上学历职位福利：五险一金、补充医疗保险、带薪年假、项目奖金、周末双休、房补、包住、年底双薪"
    str9 = "1.职责描述： 1.1带领团队达成部门年度任务目标，分解年度和季度研究任务; 1.2积极参与客户拓展与客户关系维护等工作，制定并安排好日常客户拜访，挖掘市场机会; 1.3负责客户需求沟通，并根据客户需求独立撰写项目方案和预算； 1.4负责项目投标、提案和项目谈判； 1.5负责安排下属根据项目方案拟定调研问卷、内部执行计划书、访问指南、审卷指南、编码表、数据报告格式和数据分析等项目资料； 1.6负责项目实施及人员安排，带领团队控制好项目质量、成本、项目进度； 1.7独自或与下属研究人员撰写行业高水平的、专业的、自己极度满意的市场研究报告； 1.8协助或独立解答客户质疑，提高客户满意度； 1.9独立或安排下属进行专业性的最终的成果汇报或陈述； 1.10负责部门规划和团队建设，包括员工队伍建设，员工发展计划，员工培训和绩效考核等，激励和带领团队完成研究任务； 1.11领导安排的其它任务。  2.任职资格： 2.1硕士及以上学历，社会学、统计学、应用数学、管理学等专业优先; 2.2具有3年以上的知名市场研究公司相关工作经验，1年以上同岗位或者类似岗位的工作经验； 2.3在房地产项目、政府及公共事务项目、快消类项目等研究方面具有成功管理经验者优先。  3.能力素质： 3.1熟悉定性和定量研究的基本流程和方法； 3.2学习能力强，逻辑思维强，擅长撰写报告和文件专业制作(WORD或PPT等格式)； 3.3出色的语言表达能力，擅长报告陈述； 3.4对市场研究行业有浓厚的兴趣和高度热情； 3.5诚实守信、积极主动、性格开朗、讲求效率、乐于接受挑战； 3.6善于自我激励并能够激发团队士气。  4.待遇和发展 4.1公司为本岗位提供极具竞争力的待遇和极好的发展平台; 4.2此岗位为公司核心管理岗位之一; 4.3此岗位将向合伙人发展，合伙人将享有分红并配有股权。"
    str10 = "1、负责项目技术资料的编制、整理、审核、归档、提交工作; 2、负责公司内外部和项目有关的报表数据采集及制作; 3、负责项目的跟踪和完善工作 4、协助部门领导完成其他工作。 岗位要求: 1、3年以上相关项目、销售相关工作经验; 2、熟悉体系认证相关流程的信息; 3、工作有责任心,抗压能力强。职位福利:带薪年假、周末双休、高温补贴、绩效奖金、员工公寓、家人同住、包住、五险一金"
    str11 = "岗位职责:1. 在团队负责人带领下完成指定的管理咨询任务;2. 执行既定的咨询实施方案;3. 执行资料研读、与客户的访谈、业务穿行测试等必要的咨询工作程序,调查了解客户需求与管理问题;4. 负责相关数据资料的搜集整理和分析,提出管理建议;5. 参与咨询报告或建议报告或解决方案等章节的编写;6. 按团队负责人的要求开展其他相关工作。8. 全面负责项目的进度、质量、成本和风险控制;9. 率领团队执行资料研读、与客户的访谈、业务穿行测试等必要的咨询工作程序,调查了解客户需求与管理问题;10. 设计和组织相关数据资料的搜集整理和分析;11. 编写咨询报告、建议报告或解决方案等;12. 根据确定好的报告或解决方案,培训与辅导客户实施;13. 负责催收回款等商务事宜,以及团队内部管理与考核;14. 按上级要求开展其他相关工作。 学历与经验要求:1. 本科及以上学历,审计、咨询、合规管理工作经验至少2年以上,或同类工作经验1年以上;2. 专业不限;法学相关专业优先;3. 拥有法律资格证书、注册咨询工程师投资、注册管理咨询师、注册会计师、国际注册内审师、风险管理师、注册内控师等资质证书者优先;4. 具有MBA或者管理类硕士以上学历者优先。任职条件:1. 工作细致、认真负责、有良好的执行力及职业素养;2. 具备优秀的团队组织能力及沟通协调能力,可适应经常性或长期性出差;3. 学习能力强,能够在短时间内掌握工作方法和要点,独立完成各项工作。熟练操作电脑等办公设备,常用办公软件word、excel、visio、powerpoint能熟练应用。工作年限:3-5年最低学历:本科职位福利:年底双薪、五险一金、绩效奖金、高温补贴、带薪年假、周末双休、餐补、出差补贴"
    str12 = "岗位职责1.负责国家相关政策的收集、整理和分析;2.从事钢铁产业绿色发展现状及趋势研究等工作;3.对企业绿色发展现状进行诊断;4.能够及时完成领导交办的相关工作,为部门及单位提供相关支持。 报名条件, 具备研究生及以上学历,冶金、材料、环保等相关专业;, 有3年以上冶金行业节能减排、绿色发展研究工作经验;, 积极上进,工作踏实,责任心强,具有较强的团队合作精神。职位福利:绩效奖金、餐补、采暖补贴、带薪年假、房补、定期体检、交通补助、五险一金"
    str13 = "岗位职责: 作为行业研究团队成员,参与金属大宗商品行业的分析研究工作: 1.长期跟踪国内外行业市场动态,关注实时热点并进行分析解读。 2.完成日常电话调研工作,采集一手调研数据信息,与采标单位、目标调研企业保持长期、良好的沟通关系。 3.掌握行业研究方法论,研究行业基本面、宏观环境、发展趋势等,撰写分析/点评内容。 4.完成数据整理、分析与利用,定期撰写深度行业研究报告。 5.参与行业数据库的优化升级,完成数据内容的及时更新。 6.完成其他任务,不限于维护行业关系、路演交流、发言、咨询项目等。岗位培养:专属你的培养晋升路径 1.专业导师一对一带教。 2.晋升路径: 助理分析师-分析师-高级分析师-资深分析师-行业专家 助理分析师-分析师-高级分析师-部门主管/经理-部门总监 任职要求: 1.本科及以上学历,金融、经济等相关专业,欢迎优秀应届毕业生。 2.具备优秀的逻辑分析、快速学习能力,有一定的行业分析思维。 3.对市场研究有兴趣,CET-6,有良好的文字功底,熟练使用办公软件。 4.具备较好的沟通能力、抗压能力,善于团队协作。 5.性格开朗、乐于交流,能够很好地维护信息渠道关系。 薪酬福利: 五险一金、补充医疗保险+节日福利、定期体检、带薪年假、带薪病假职位福利:五险一金、带薪年假、补充医疗保险、定期体检、周末双休、13薪"
    # zh_clear(cleaned_text=str3)
    # zh_clear(cleaned_text=str4)
    # zh_clear(cleaned_text=str5)
    # zh_clear(cleaned_text=str7)
    # zh_clear(cleaned_text=str8)
    # zh_clear(cleaned_text=str9)
    # zh_clear(cleaned_text=str10)
    # zh_clear(cleaned_text=str11)
    # zh_clear(cleaned_text=str12)
    # zh_clear(cleaned_text=str13)

    # resp = clean_cleaned_text(str1)
    # print(resp)
    workbook = openpyxl.load_workbook('../Data/数据汇总.xlsx')
    path_ = "../Data/数据清洗/"
    if not os.path.exists(path_):
        os.mkdir(path_)
    for sheet_name in workbook.sheetnames:
        save_path = path_ + f"{sheet_name}.csv"
        item = dict()
        print(f"当前网站: {sheet_name}")
        df = pd.read_excel('../Data/数据汇总.xlsx', sheet_name=sheet_name, index_col=0)
        if re.search('[\u4e00-\u9fa5]', sheet_name):  # 中文网站
            for i in df["任职要求"]:
                zh_clear(i)
        else:  # 英文网站
            # df = EnDataClear(data_frame=df).main_()
            pass
